# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 11_subgroup_analyses.R
### This script conducts subgroup analyses.
# ----
# Functions ----
# Function to run the regression for a subset
tab_regression_subgroup_ITT <- function(varname, covariate, design) {
  design <- design[!is.na(design$variables[[covariate]]), ]
  
  # Get unique levels of the 'endline_weeks_from_first_category' covariate
  covariate_levels <- unique(design$variables[[covariate]])
  
  # Filter out 0
  ##covariate_levels <- covariate_levels[!covariate_levels == 0]
  
  # Initialize an empty dataframe to store results
  results <- data.frame()
  
  # Loop through each level of the covariate
  for (level in covariate_levels) {
    # Filter the design for the current level
    design_sub <- design[design$variables[[covariate]] == level,]
    
    # Define the formula
    fml <- as.formula(paste(varname, "~ treatment + library_spillover_pre"))
    
    # Run the regression
    mod <- svyglm(fml, design = design_sub)
    
    # Extract the results and add relevant information
    out <- tidy(mod) %>%
      mutate(dv = varname, n = nobs(mod), covariate_level = level, .before = 1)
    
    # Append to results
    results <- bind_rows(results, out)
  }
  
  return(results)
}

# Calculate p-value of the difference
calculate_p_value_difference <- function(data) {
  results <- data.frame() # Empty data frame to store results
  
  # Loop through each unique dependent variable
  for (dv in unique(data$dv)) {
    subset_data <- data[data$dv == dv & grepl("treatment", data$term), ] # Adjust term matching
    
    # Check if there are exactly two groups to compare
    if (nrow(subset_data) == 2 && length(unique(subset_data$covariate_level)) == 2) {
      # Extract values for each group
      est_1 <- subset_data$estimate[1]
      est_2 <- subset_data$estimate[2]
      se_1 <- subset_data$std.error[1]
      se_2 <- subset_data$std.error[2]
      n_1 <- subset_data$n[1]
      n_2 <- subset_data$n[2]
      
      # Calculate t-statistic for the difference in estimates
      t_stat <- (est_1 - est_2) / sqrt(se_1^2 + se_2^2)
      
      # Degrees of freedom (approximated as sum of sample sizes minus 2)
      df <- n_1 + n_2 - 2
      
      # Calculate two-sided p-value for the t-test
      p_value <- 2 * pt(abs(t_stat), df, lower.tail = FALSE)
      
      # Add the result to the results data frame
      results <- rbind(results, data.frame(
        dv = dv,
        t_statistic = t_stat,
        p_value_difference = p_value,
        estimate_diff = est_1 - est_2,
        covariate_levels = paste(unique(subset_data$covariate_level), collapse = ", "),
        n_1 = n_1,
        n_2 = n_2
      ))
    }
  }
  return(results)
}

# Conduct analyses ----
## Gender ----
### Figure ----
subgroup_itt_gender_levels <- bimli_svy_dkr.rm %>%
  # lapply to process each dependent variable
  {lapply(dv_labs_endline_main$dv[is.na(dv_labs_endline_main$subidx)], function(x) tab_regression_subgroup_ITT(x, "gender", .))} %>%
  bind_rows() %>% # Combine all results
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term))

subgroup_itt_gender_levels <- left_join(dv_labs_endline_main, subgroup_itt_gender_levels, by = "dv") %>%
  filter(!is.na(estimate)) %>%
  filter(is.na(subidx) & secondary_outcome == FALSE & mechanism == FALSE)

p_value_itt_gender <- calculate_p_value_difference(subgroup_itt_gender_levels)

# Prepare the plot data by joining p-values with the main data and filtering only main indices
plot_data_subgroup_gender <- subgroup_itt_gender_levels %>%
  left_join(p_value_itt_gender, by = "dv") %>%
  filter(str_detect(label, "Index"))  # Only plot main indices

# Prepare annotation data for p-values (no brackets), positioning each p-value to the right of the highest estimate within each facet
annotations_subgroup_gender <- plot_data_subgroup_gender %>%
  group_by(dv, idx) %>%
  summarise(
    y_min = 1,                              # Start of bracket at y = 1
    y_max = 2,                              # End of bracket at y = 2
    p_value_label = ifelse(p_value_difference[1] < 0.01, 
                           "  p < 0.01",
                           paste0("  p = ", round(p_value_difference[1], 2))),
    x_position = max(upper) + 0.02          # Position to the right of the highest estimate within each facet
  )

# Create the plot with p-values and brackets to the right of the highest estimate within each facet
plot_data_subgroup_gender %>%
  mutate(
    label = if_else(str_detect(label, "Index"), "Index", label),
    label = as_factor(label),
    idx = as_factor(idx),
    is.idx = if_else(label == "Index", TRUE, FALSE),
    significance = ifelse(p.value < 0.05, "YES", "NO"),
    covariate_level = case_when(covariate_level == "Boy" ~ "Boys",
                                covariate_level == "Girl" ~ "Girls",
                                TRUE ~ NA_character_)
  ) %>%
  ggplot(aes(x = estimate, y = fct_rev(covariate_level), linetype = significance, color = covariate_level)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = position_dodge(width = 1)) +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0, position = position_dodge(width = 1)) +
  facet_grid(rows = vars(idx), scales = "free_y", space = "free_y", switch = "y") +
  xlab("Estimated Subgroup ITT") +
  scale_y_discrete(position = "right") +
  scale_color_manual(values = c("Girls" = "#2066a8", "Boys" = "#ea801c"), name = "") +
  xlim(-0.2, 0.5) +
  scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
  theme_bw() %+replace%
  theme(
    axis.title.y = element_blank(),
    strip.text.y.left = element_text(angle = 0),
    legend.position = "right",
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  ) +
  # Add p-value text on the right of the highest estimate within each facet
  geom_text(data = annotations_subgroup_gender,
            aes(x = x_position, y = 1.5, label = p_value_label),
            inherit.aes = FALSE, color = "grey30", size = 3, hjust = 0) +
  # Add vertical line as the main bracket from y = 1 to y = 2
  geom_segment(data = annotations_subgroup_gender,
               aes(x = x_position, xend = x_position, y = y_min, yend = y_max),
               inherit.aes = FALSE, color = "grey30", size = 0.3) +
  # Add top horizontal segment of the bracket
  geom_segment(data = annotations_subgroup_gender,
               aes(x = x_position, xend = x_position - 0.01, y = y_max, yend = y_max),
               inherit.aes = FALSE, color = "grey30", size = 0.3) +
  # Add bottom horizontal segment of the bracket
  geom_segment(data = annotations_subgroup_gender,
               aes(x = x_position, xend = x_position - 0.01, y = y_min, yend = y_min),
               inherit.aes = FALSE, color = "grey30", size = 0.3)
ggsave("output/figures/subgroup_itt_gender_levels.pdf", height = 4, width = 8)

### Engagement table ----
engagement_vars <- c("engagement_attitude", "engagement_behavior")

engagement_means_ci_df <- lapply(engagement_vars, function(var) {
  svyby(
    as.formula(paste0("~", var)),
    by = ~treatment + gender,
    design = bimli_svy_dkr.rm,
    svymean,
    vartype = "ci",
    level = 0.95,
    na.rm = TRUE
  ) %>%
    rename(mean = !!var,
           lower = ci_l,
           upper = ci_u) %>%
    mutate(variable = var)
}) %>%
  bind_rows()

engagement_table_df <- engagement_means_ci_df %>%
  mutate(
    mean_ci = sprintf("%.3f (%.3f, %.3f)", mean, lower, upper),
    Group = ifelse(treatment == "Media Literacy", "Treatment", "Control"),
    Gender = ifelse(gender == "Boy", "Boy (mean)", "Girl (mean)")
  ) %>%
  select(variable, Group, Gender, mean_ci) %>%
  rename(
    Variable = variable) %>%
  pivot_wider(names_from = Gender, values_from = mean_ci) %>%
  mutate(
    Variable = recode(Variable,
                      "engagement_attitude" = "Engagement Attitude",
                      "engagement_behavior" = "Engagement Behavior")
  ) %>%
  arrange(Variable, Group)


engagement_latex_table <- xtable(
  engagement_table_df,
  caption = "Engagement indices means by gender (95\\% CIs in parentheses)",
  label = "tab:engagement-gender"
)

print(
  engagement_latex_table,
  include.rownames = FALSE,
  sanitize.text.function = identity,
  caption.placement = "top",
  file = "output/tables/tab_engagement_means_gender.tex"
)

## Grade ----
subgroup_itt_class_levels <- bimli_svy_dkr.rm %>%
  filter(class < 13) %>%
  mutate(class = as.factor(class)) %>%
  # lapply to process each dependent variable
  {lapply(dv_labs_endline_main$dv[is.na(dv_labs_endline_main$subidx)], function(x) tab_regression_subgroup_ITT(x, "class", .))} %>%
  bind_rows() %>% # Combine all results
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term))

subgroup_itt_class_levels <- left_join(dv_labs_endline_main, subgroup_itt_class_levels, by = "dv") %>%
  filter(!is.na(estimate)) %>%
  filter(is.na(subidx) & secondary_outcome == FALSE & mechanism == FALSE)

# plot
subgroup_itt_class_levels %>%
  mutate(covariate_level = factor(covariate_level, 
                                  levels = c("8", "9", "10", "11", "12"))) %>%
  filter(str_detect(label, "Index")) %>% # only plot main indices
  mutate(label = if_else(str_detect(label, "Index"), "Index", label),
         label = as_factor(label),
         idx = as_factor(idx),
         is.idx = if_else(label == "Index", TRUE, FALSE),
         significance = ifelse(p.value < 0.05, "YES", "NO")) %>%
  ggplot(aes(x = estimate, y = covariate_level, linetype = significance, color = covariate_level)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = position_dodge(width = 1)) +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0, position = position_dodge(width = 1)) +
  facet_grid(rows = "idx", scales = "free_y", space = "free_y", switch = "y") +
  xlab("Estimated Subgroup ITT") +
  scale_y_discrete(position = "right") +
  scale_color_brewer(palette = "Set1", name = "") +
  scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
  theme_bw() %+replace%
  theme(axis.title.y = element_blank(),
        strip.text.y.left = element_text(angle = 0),
        legend.position = "bottom")
ggsave("output/figures/subgroup_itt_class_levels.pdf", height = 7, width = 8)

## Guardian present during endline ----
subgroup_itt_guardian_involvement1_levels <- bimli_svy_dkr.rm %>%
  # lapply to process each dependent variable
  {lapply(dv_labs_endline_main$dv[is.na(dv_labs_endline_main$subidx)], function(x) tab_regression_subgroup_ITT(x, "guardian_involvement1", .))} %>%
  bind_rows() %>% # Combine all results
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term))

subgroup_itt_guardian_involvement1_levels <- left_join(dv_labs_endline_main, subgroup_itt_guardian_involvement1_levels, by = "dv") %>%
  filter(!is.na(estimate)) %>%
  filter(is.na(subidx) & secondary_outcome == FALSE & mechanism == FALSE)

p_value_itt_guardian_involvement1 <- calculate_p_value_difference(subgroup_itt_guardian_involvement1_levels)

# Prepare the plot data by joining p-values with the main data and filtering only main indices
plot_data_subgroup_guardian_involvement1 <- subgroup_itt_guardian_involvement1_levels %>%
  left_join(p_value_itt_guardian_involvement1, by = "dv") %>%
  filter(str_detect(label, "Index"))  # Only plot main indices

# Prepare annotation data for p-values (no brackets), positioning each p-value to the right of the highest estimate within each facet
annotations_subgroup_guardian_involvement1 <- plot_data_subgroup_guardian_involvement1 %>%
  group_by(dv, idx) %>%
  summarise(
    y_min = 1,                              # Start of bracket at y = 1
    y_max = 2,                              # End of bracket at y = 2
    p_value_label = ifelse(p_value_difference[1] < 0.01, 
                           "  p < 0.01",
                           paste0("  p = ", round(p_value_difference[1], 2))),
    x_position = max(upper) + 0.02          # Position to the right of the highest estimate within each facet
  )

# Create the plot with p-values and brackets to the right of the highest estimate within each facet
plot_data_subgroup_guardian_involvement1 %>%
  mutate(
    label = if_else(str_detect(label, "Index"), "Index", label),
    label = as_factor(label),
    idx = as_factor(idx),
    is.idx = if_else(label == "Index", TRUE, FALSE),
    significance = ifelse(p.value < 0.05, "YES", "NO"),
    covariate_label = if_else(covariate_level == "1", "Guardian Present", "Guardian Absent"),
  ) %>%
  ggplot(aes(x = estimate, y = covariate_label, linetype = significance, color = covariate_label)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = position_dodge(width = 1)) +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0, position = position_dodge(width = 1)) +
  facet_grid(rows = vars(idx), scales = "free_y", space = "free_y", switch = "y") +
  xlab("Estimated Subgroup ITT") +
  scale_y_discrete(position = "right") +
  scale_color_manual(values = c("Guardian Present" = "#2066a8", "Guardian Absent" = "#ea801c"), name = "") +
  xlim(-0.2, 0.5) +
  scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
  theme_bw() %+replace%
  theme(
    axis.title.y = element_blank(),
    strip.text.y.left = element_text(angle = 0),
    legend.position = "bottom"
  ) +
  # Add p-value text on the right of the highest estimate within each facet
  geom_text(data = annotations_subgroup_guardian_involvement1,
            aes(x = x_position, y = 1.5, label = p_value_label),
            inherit.aes = FALSE, color = "grey30", size = 3, hjust = 0) +
  # Add vertical line as the main bracket from y = 1 to y = 2
  geom_segment(data = annotations_subgroup_guardian_involvement1,
               aes(x = x_position, xend = x_position, y = y_min, yend = y_max),
               inherit.aes = FALSE, color = "grey30", size = 0.3) +
  # Add top horizontal segment of the bracket
  geom_segment(data = annotations_subgroup_guardian_involvement1,
               aes(x = x_position, xend = x_position - 0.01, y = y_max, yend = y_max),
               inherit.aes = FALSE, color = "grey30", size = 0.3) +
  # Add bottom horizontal segment of the bracket
  geom_segment(data = annotations_subgroup_guardian_involvement1,
               aes(x = x_position, xend = x_position - 0.01, y = y_min, yend = y_min),
               inherit.aes = FALSE, color = "grey30", size = 0.3)
ggsave("output/figures/subgroup_itt_guardian_involvement1_levels.pdf", height = 5, width = 8)

## Attendance combinations ----
sessions <- 1:4
levels_list <- c(
  "None",
  as.character(sessions), 
  combn(sessions, 2, FUN = function(x) paste(x, collapse = ", ")),
  combn(sessions, 3, FUN = function(x) paste(x, collapse = ", ")),
  paste(sessions, collapse = ", ")
)

subgroup_itt_compliance_combo_unadj <- bimli_svy_dkr.rm %>%
  mutate(
    compliance_comb = factor(ifelse(
      session1 + session2 + session3 + session4 == 0,
      "None",
      gsub(
        "\\s+", ", ",
        trimws(
          paste(
            ifelse(session1 == 1, "1", ""),
            ifelse(session2 == 1, "2", ""),
            ifelse(session3 == 1, "3", ""),
            ifelse(session4 == 1, "4", "")
          )
        )
      )
    ), levels = c(levels_list))) %>%
  # lapply to process each dependent variable
  {lapply(c("index_of_indices"), function(x) tab_regression_subgroup_ITT(x, "compliance_comb", .))} %>%
  bind_rows() %>% # Combine all results
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) %>%
  mutate(model = "Unadjusted") %>%
  mutate(covariate_level = factor(covariate_level,
                                  levels = levels_list,
                                  labels = c("None", "1", "2", "3", "4",
                                             "1, 2", "1, 3", "1, 4", 
                                             "2, 3", "2, 4", "3, 4",
                                             "1, 2, 3", "1, 2, 4", 
                                             "1, 3, 4", "2, 3, 4",
                                             "1, 2, 3, 4"))) %>%
  mutate(covariate_group = case_when(
    covariate_level %in% c("None") ~ "0",
    covariate_level %in% c("1", "2", "3", "4") ~ "1",
    covariate_level %in% c("1, 2", "1, 3", "1, 4",
                           "2, 3", "2, 4",
                           "3, 4") ~ "2",
    covariate_level %in% c("1, 2, 3", "1, 2, 4",
                           "1, 3, 4", "2, 3, 4") ~ "3",
    covariate_level %in% c("1, 2, 3, 4") ~ "4"))

# Create the plot with p-values and brackets to the right of the highest estimate within each facet
subgroup_itt_compliance_combo_unadj %>%
  mutate(
    significance = ifelse(p.value < 0.05, "YES", "NO")) %>%
  ggplot(aes(x = estimate, y = covariate_level, color = covariate_group, linetype = significance)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = position_dodge(width = 1)) +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0, position = position_dodge(width = 1)) +
  xlab("Estimated Subgroup ITT on Index of Indices") +
  scale_y_discrete(position = "right") +
  scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
  #scale_color_manual(values = c("Adjusted" = "#2066a8", "Unadjusted" = "#ea801c")) + 
  labs(color = "Sessions Attended") + # Remove legend title for 'model'
  theme_bw() %+replace%
  theme(
    axis.title.y = element_blank(),
    strip.text.y.left = element_text(angle = 0),
    legend.position = "bottom")
ggsave("output/figures/subgroup_itt_attendance_combinations.pdf", height = 6, width = 8)

## Follow-up timing (weeks from first) ----
subgroup_itt_follow_weeks_from_first_levels_adj <- bimli_svy_dkr.rm %>%
  # lapply to process each dependent variable
  {lapply(follow_up_dvs$dv[follow_up_dvs$type == "idx"], function(x) tab_regression_subgroup_ITT(x, "follow_weeks_from_first", .))} %>%
  bind_rows() %>% # Combine all results
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) %>%
  mutate(model = "Adjusted")

subgroup_itt_follow_weeks_from_first_levels <- left_join(follow_up_dvs, rbind(subgroup_itt_follow_weeks_from_first_levels_adj), by = "dv") %>%
  filter(!is.na(estimate)) %>%
  filter(is.na(subidx) & secondary_outcome == FALSE & mechanism == FALSE) %>%
  filter(type == "idx")

# Create the plot with p-values and brackets to the right of the highest estimate within each facet
subgroup_itt_follow_weeks_from_first_levels %>%
  mutate(
    label = if_else(str_detect(label, "Index"), "Index", label),
    label = as_factor(label),
    idx = as_factor(idx),
    is.idx = if_else(label == "Index", TRUE, FALSE),
    significance = ifelse(p.value < 0.05, "YES", "NO"),
    covariate_level = factor(covariate_level,
                             levels = c("Week 4+","Week 3","Week 2","Week 1"),
                             labels = c("Week 4+","Week 3","Week 2","Week 1"))) %>%
  ggplot(aes(x = estimate, y = covariate_level, linetype = significance)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = position_dodge(width = 1)) +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0, position = position_dodge(width = 1)) +
  facet_grid(rows = vars(idx), scales = "free_y", space = "free_y", switch = "y") +
  xlab("Estimated Subgroup ITT") +
  scale_y_discrete(position = "right") +
  scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
  #scale_color_manual(values = c("Adjusted" = "#2066a8", "Unadjusted" = "#ea801c")) + 
  labs(color = NULL) + # Remove legend title for 'model'
  theme_bw() %+replace%
  theme(
    axis.title.y = element_blank(),
    strip.text.y.left = element_text(angle = 0),
    legend.position = "bottom")
ggsave("output/figures/subgroup_itt_follow_up_weeks_from_first_levels.pdf", height = 4, width = 8)


# END of 11_subgroup_analyses.R ----